import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_score

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei'] 
plt.rcParams['axes.unicode_minus'] = False

# 生成模拟数据
X, y_true = make_blobs(n_samples=300, centers=4, 
                      cluster_std=0.60, random_state=42)

# K-means聚类
kmeans = KMeans(n_clusters=4, random_state=42)
y_pred = kmeans.fit_predict(X)

# 评估聚类效果
silhouette_avg = silhouette_score(X, y_pred)
print(f"轮廓系数: {silhouette_avg:.3f}")

# 可视化结果
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.scatter(X[:, 0], X[:, 1], c=y_true, cmap='viridis', s=50)
plt.title('真实标签')
plt.xlabel('特征1')
plt.ylabel('特征2')

plt.subplot(1, 2, 2)
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap='viridis', s=50)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
           marker='x', s=200, linewidths=3, color='red')
plt.title('K-means聚类结果')
plt.xlabel('特征1')
plt.ylabel('特征2')

plt.tight_layout()
plt.show()